Spatial interpolation

The contents will appear later

This section will contain hands-on material that will appear later.

import geopandas as gpd
import tobler
import pyinterpolate
import numpy as np

from libpysal import graph
from sklearn import neighbors
from scipy import interpolate

Area interpolation

simd = gpd.read_file("data/edinburgh_simd_2020.gpkg")
simd.head(2)
DataZone DZName LAName SAPE2017 WAPE2017 Rankv2 Quintilev2 Decilev2 Vigintilv2 Percentv2 ... CrimeRate CrimeRank HouseNumOC HouseNumNC HouseOCrat HouseNCrat HouseRank Shape_Leng Shape_Area geometry
0 S01008417 Balerno and Bonnington Village - 01 City of Edinburgh 708 397 5537 4 8 16 80 ... 86 5392.0 17 8 2% 1% 6350.0 20191.721420 1.029993e+07 POLYGON ((315157.369 666212.846, 315173.727 66...
1 S01008418 Balerno and Bonnington Village - 02 City of Edinburgh 691 378 6119 5 9 18 88 ... 103 5063.0 7 10 1% 1% 6650.0 25944.861787 2.357050e+07 POLYGON ((317816.000 666579.000, 318243.000 66...

2 rows × 52 columns

simd[["EmpRate", "geometry"]].explore("EmpRate", tiles="CartoDB Positron")
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
grid_8 = tobler.util.h3fy(simd, resolution=8)
grid_8.head(2)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/pyproj/crs/crs.py:1293: UserWarning: You will likely lose important projection information when converting to a PROJ string from another format. See: https://proj.org/faq.html#what-is-the-best-format-for-describing-coordinate-reference-systems
  proj = self._crs.to_proj4(version=version)
geometry
hex_id
881972393dfffff POLYGON ((328193.671 671699.739, 327730.157 67...
8819727743fffff POLYGON ((318448.651 677311.606, 317984.690 67...
m = simd.boundary.explore(tiles="CartoDB Positron")
grid_8.boundary.explore(m=m, color="red")
Make this Notebook Trusted to load map: File -> Trust Notebook

overlay

interpolated = tobler.area_weighted.area_interpolate(
    simd,
    grid_8,
    extensive_variables=["EmpNumDep", "IncNumDep"],
    intensive_variables=["EmpRate", "IncRate"],
)
interpolated.explore("EmpRate", tiles="CartoDB Positron")
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook

Point interpolation

airbnb = gpd.read_file("data/edinburgh_airbnb_2023.gpkg")
airbnb.head()
id bedrooms property_type price geometry
0 15420 1.0 Entire rental unit $126.00 POINT (325921.137 674478.931)
1 790170 2.0 Entire condo $269.00 POINT (325976.360 677655.252)
2 24288 2.0 Entire loft $95.00 POINT (326069.186 673072.913)
3 821573 2.0 Entire rental unit $172.00 POINT (326748.646 674001.683)
4 822829 3.0 Entire rental unit $361.00 POINT (325691.831 674328.127)
airbnb.price.head()
0    $126.00
1    $269.00
2     $95.00
3    $172.00
4    $361.00
Name: price, dtype: object
airbnb["price_float"] = airbnb.price.str.strip("$").str.replace(",", "").astype(float)
two_bed_homes = airbnb[
    (airbnb["bedrooms"] == 2)
    & (airbnb["property_type"] == "Entire rental unit")
    & (airbnb["price_float"] < 300)
].copy()
two_bed_homes.head()
id bedrooms property_type price geometry price_float
3 821573 2.0 Entire rental unit $172.00 POINT (326748.646 674001.683) 172.0
5 834777 2.0 Entire rental unit $264.00 POINT (324950.724 673875.598) 264.0
6 450745 2.0 Entire rental unit $177.00 POINT (326493.725 672853.904) 177.0
10 485856 2.0 Entire rental unit $157.00 POINT (326597.124 673869.551) 157.0
17 51505 2.0 Entire rental unit $155.00 POINT (325393.807 674177.409) 155.0
two_bed_homes.geometry.duplicated().any()
True
two_bed_homes = two_bed_homes.drop_duplicates("geometry")
two_bed_homes.explore("price_float", tiles="CartoDB Positron")
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
from libpysal import graph

knn = graph.Graph.build_kernel(two_bed_homes, k=10).transform("r")
two_bed_homes["price_lag"] = knn.lag(two_bed_homes.price_float)
two_bed_homes.explore("price_lag", tiles="CartoDB Positron")
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook

Nearest

grid_10 = tobler.util.h3fy(simd, resolution=10)
len(grid_10)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/pyproj/crs/crs.py:1293: UserWarning: You will likely lose important projection information when converting to a PROJ string from another format. See: https://proj.org/faq.html#what-is-the-best-format-for-describing-coordinate-reference-systems
  proj = self._crs.to_proj4(version=version)
20162
grid_coordinates = grid_10.centroid.get_coordinates()
grid_coordinates.head()
x y
hex_id
8a1972766707fff 321343.980521 675606.747254
8a197275cdb7fff 315877.854512 668791.639689
8a197239ad17fff 328805.423248 667597.536876
8a1972742537fff 316429.948928 670257.054100
8a1972740197fff 319126.996682 670074.407673
coords = two_bed_homes.get_coordinates()
coords.head()
x y
3 326748.645636 674001.683211
5 324950.723888 673875.598033
6 326493.725178 672853.903917
10 326597.123684 673869.551295
17 325393.807222 674177.408961
nearest = interpolate.griddata(
    coords,
    two_bed_homes.price_lag,
    grid_coordinates,
    method="nearest",
)
nearest
array([189.        , 181.        ,  86.        , ..., 188.70398583,
       188.70398583,  84.        ])
grid_10["nearest"] = nearest
_ = grid_10.plot('nearest', legend=True)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(values.dtype):

caption here
ax = grid_10.plot('nearest', legend=True)
two_bed_homes.plot(ax=ax, color="red", markersize=1)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(values.dtype):
<Axes: >

caption here

K-Nearest neighbours regression

Uniform

interpolation_uniform = neighbors.KNeighborsRegressor(n_neighbors=10, weights="uniform")
interpolation_uniform.fit(
    coords, two_bed_homes.price_lag
)
KNeighborsRegressor(n_neighbors=10)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
price_on_grid = interpolation_uniform.predict(grid_coordinates)
price_on_grid
array([126.85397127, 100.56773529, 145.41599385, ..., 120.36697169,
       120.36697169, 120.36697169])
grid_10["knn_uniform"] = price_on_grid
_ = grid_10.plot("knn_uniform", legend=True)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(values.dtype):

caption here

Distance-weighted

interpolation_distance = neighbors.KNeighborsRegressor(n_neighbors=10, weights="distance")
interpolation_distance.fit(
    coords, two_bed_homes.price_lag
)
KNeighborsRegressor(n_neighbors=10, weights='distance')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
grid_10["knn_distance"] = interpolation_distance.predict(grid_coordinates)
_ = grid_10.plot("knn_distance", legend=True)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(values.dtype):

caption here

Distance band regression

interpolation_radius = neighbors.RadiusNeighborsRegressor(radius=1000, weights="distance")
interpolation_radius.fit(
    coords, two_bed_homes.price_lag
)
RadiusNeighborsRegressor(radius=1000, weights='distance')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
grid_10["radius"] = interpolation_radius.predict(grid_coordinates)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/sklearn/neighbors/_regression.py:500: UserWarning: One or more samples have no neighbors within specified radius; predicting NaN.
  warnings.warn(empty_warning_msg)
_ = grid_10.plot("radius", legend=True, missing_kwds={'color': 'lightgrey'})
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(values.dtype):

caption here

Ordinary Kriging

input_data = np.hstack([coords, two_bed_homes.price_lag.values.reshape(-1, 1)])
input_data
array([[3.26748646e+05, 6.74001683e+05, 2.07033397e+02],
       [3.24950724e+05, 6.73875598e+05, 1.54805935e+02],
       [3.26493725e+05, 6.72853904e+05, 1.43865293e+02],
       ...,
       [3.28513265e+05, 6.74048892e+05, 1.06875409e+02],
       [3.26840903e+05, 6.74767224e+05, 1.68848108e+02],
       [3.25415664e+05, 6.73345158e+05, 2.15847334e+02]])
step_radius = 100  # meters
max_range = 5000  # meters

exp_semivar = pyinterpolate.build_experimental_variogram(
    input_array=input_data,
    step_size=step_radius,
    max_range=max_range,
)
exp_semivar.plot()

caption here
semivar = pyinterpolate.build_theoretical_variogram(
    experimental_variogram=exp_semivar,
    model_name='linear',
    sill=exp_semivar.variance,
    rang=5000
)
semivar.plot()

caption here
ordinary_kriging = pyinterpolate.kriging(
    observations=input_data,
    theoretical_model=semivar,
    points=grid_coordinates.values,
    no_neighbors=10,
    how="ok",
    show_progress_bar=False,
)
grid_10["ordinary_kriging"] = ordinary_kriging[:, 0]
_ = grid_10.plot("ordinary_kriging", legend=True)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(values.dtype):

caption here
grid_10["variance_error"] = ordinary_kriging[:, 1]
_ = grid_10.plot("variance_error", legend=True)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(values.dtype):

caption here

Use larger distance

semivar_larger = pyinterpolate.build_theoretical_variogram(
    experimental_variogram=exp_semivar,
    model_name='linear',
    sill=exp_semivar.variance,
    rang=15000,
)
ordinary_kriging_l = pyinterpolate.kriging(
    observations=input_data,
    theoretical_model=semivar_larger,
    points=grid_coordinates.values,
    no_neighbors=10,
    how="ok",
    show_progress_bar=False,
)
grid_10["ok_larger"] = ordinary_kriging_l[:, 0]
_ = grid_10.plot("ok_larger", legend=True)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(values.dtype):

caption here
grid_10["variance_error_l"] = ordinary_kriging_l[:, 1]
_ = grid_10.plot("variance_error_l", legend=True)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/plotting.py:732: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(values.dtype):

caption here